# -*- coding: utf-8 -*-
"""
Created on Fri May 31 16:02:50 2024

@author: yys
"""

import os
import random
import pandas as pd

# 设置随机数种子
random.seed(42)

# 指定文件夹路径
folder_path = 'D:/guo/RS-BNU/bj/onlyok/new/tk_qingtiannew'
# folder_path = 'D:/guo/RS-BNU/bj/onlyok/new/tk_cloud'
# 获取文件夹中所有CSV文件的文件名
csv_files = [file for file in os.listdir(folder_path) if file.endswith('.csv')]

# 计算每一类应该有的文件数量
total_files = len(csv_files)
group1_count = int(total_files * 0.7)
group2_count = int(total_files * 0.2)
group3_count = total_files - group1_count - group2_count

# 随机选择相应数量的文件作为每一类
group1_files = random.sample(csv_files, group1_count)
remaining_files = [file for file in csv_files if file not in group1_files]
group2_files = random.sample(remaining_files, group2_count)
group3_files = [file for file in remaining_files if file not in group2_files]

# 合并每一类文件为DataFrame
def merge_files(files):
    data = []
    for file in files:
        file_path = os.path.join(folder_path, file)
        df = pd.read_csv(file_path)
        data.append(df)
    return pd.concat(data, ignore_index=True)

group1_df = merge_files(group1_files)
group2_df = merge_files(group2_files)
group3_df = merge_files(group3_files)

# 将三个DataFrame保存为CSV文件
group1_df.to_csv('D:/guo/RS-BNU/bj/onlyok/new/sunny_xl1.csv', index=False)
group2_df.to_csv('D:/guo/RS-BNU/bj/onlyok/new/sunny_yz1.csv', index=False)
group3_df.to_csv('D:/guo/RS-BNU/bj/onlyok/new/sunny_cs1.csv', index=False)


# group1_df.to_csv('D:/guo/RS-BNU/bj/onlyok/new/cloud_xl.csv', index=False)
# group2_df.to_csv('D:/guo/RS-BNU/bj/onlyok/new/cloud_yz.csv', index=False)
# group3_df.to_csv('D:/guo/RS-BNU/bj/onlyok/new/cloud_cs.csv', index=False)

print("按比例分组并保存到group1.csv，group2.csv和group3.csv文件中。")
